<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
  <head>
    <title>Tutorial on ustr APIs</title>

 <link rel="stylesheet" type="text/css" href="http://www.and.org/f_c-1.0.css">
  <style>
      A       { background: #FFF; color: #44F; }
      A:hover {                   color: #20b2aa; }

      A.anchor       { color: #000; }
      A.anchor:hover { color: #000; }

      P { text-indent: 2em; }

      body { background: #FFF; }

      tr.heading { background: #DDD; }
      tr.heading:hover { background: #ccc; }

      table.conv       { border-bottom: solid; margin-bottom: 2em; }

      table.conv tr.r1 { background: #eee; }
      table.conv tr.r2 { }
      table.conv tr.r1:hover { background: #ccc; }
      table.conv tr.r2:hover { background: #ccc; }

      table.fig1   { text-align: center; }

      td.ent1     { color: #080; }
      td.ent2     { color: #800; }

      span.blk        a.anc     {  display: none; }
      span.blk:hover  a.anc     {  display: inline; color: #d7f;   }

      span.lastmod              {  text-size: 50%; }
  </style>

  </head>

  <body>
    <h1>Tutorial on ustr APIs</h1>

<p>
 ustr is a string library designed to be very space efficient, and easily integrate with "normal" C string handling code. This means that it is designed to allow the programer to create the "strings" from allocated memory, the stack and read-only memory.
</p>

<p>
Also note that all error checking is included in every example, it may make the examples somewhat easier to read if it wasn't included ... however including error checking is what the code must look like in a real application. 
</p>

<ul>
<li><a href="#skel">Skel</a>: Skeleton template for tutorial snippets.</li>
<li><a href="#hw_c">Hello World - const</a>: A very simple hello world, using constant strings.</li>
<li><a href="#hw_d">Hello World - dup</a>: Create the string.</li>
<li><a href="#hw_m">Hello World - multiple</a>: Create the string from multiple functions.</li>
<li><a href="#hw_s">Hello World - stack</a>: Use the stack instead of the heap.</li>
<li><a href="#fgrep">fgrep</a>: Simple fgrep.</li>
<li><a href="#mkdir">mkdir</a>: Simple mkdir -p.</li>
<li><a href="#txt2html">Text to html converter</a>: Convert plain text into similar html.</li>
</ul>

<a class="anchor" id="skel">
<h2> Skeleton code required to run the tutorial examples </h2>
</a>

<p> This is the skeleton code required to run all the tutorial examples,
 <a href="http://www.and.org/ustr/src/examples/">the full versions</a> may
 require a few more headers though. </p>

<pre class="c2html">
#<span class="cppinclude">include</span> <span class="str">"ustr.h"</span>

#<span class="cppinclude">include</span> &lt;errno.h&gt;

<span class="static">static</span> <span class="void">void</span> die(<span clas
s="const">const</span> <span class="char">char</span> *prog_name, <span class="c
onst">const</span> <span class="char">char</span> *msg)
{
  fprintf(<span class="stderr">stderr</span>, <span class="str">"%s: %s\n"</span
>, prog_name, msg);
  <span class="exit">exit</span> (<span class="exitfail">EXIT_FAILURE</span>);
}
</pre>

<a class="anchor" id="hw_c">
<h2> Hello World - const </h2>
</a>

<p> This example is the simplest possible, basically being the same as
 calling puts(). Note however that the length of the string is passed
 through to the IO function without any compiler magic by using the
 <a href="functions.html#USTR1_CHK">USTR1_CHK (checked
 constant ustr creation)</a> macro, if it looks a little weird at first you
 can use the
 <a href="http://www.and.org/ustr/src/examples/custr.c">custr</a> example
 program, to create constant ustr's automatically).
</p>

<p> Note: Although the USTR1_CHK() macro is handed constant data and should be
 able to determine at compile time whether the length check is correct, GCC
 doesn't think this is constant and so for file scope variables you'll need to
 use the <a href="functions.html#USTR1">USTR1 (non-checking constant ustr
 creation)</a> macro. Or even the simple
 <a href="functions.html#USTR">USTR("")</a> macro for empty ustr strings.</p>

<pre class="c2html">
<span class="comment">/* simplest, just "create" a Ustr from a constant string */</span>
<span class="static">static</span> <span class="void">void</span> hello_world_one(<span class="void">void</span>)
{
  Ustr *hello_world = USTR1_CHK(\xC, <span class="str">"Hello world!"</span>);

  <span class="if">if</span> (!ustr_io_putfileline(&amp;hello_world, <span class="stdout">stdout</span>))
    die(<span class="str">"hello_world"</span>, strerror(<span class="errno">errno</span>));
}
</pre>

<a class="anchor" id="hw_d">
<h2> Hello World - dup </h2>
</a>

<p> This example creates the string before writting it out. Note that although
 the ustr is allocated, it doesn't need to be free'd because a default
 configured string of zero length is represented by "".</p>

<pre class="c2html">
<span class="static">static</span> <span class="void">void</span> hello_world_two(<span class="void">void</span>)
{ <span class="comment">/* next, create a Ustr from a printf() like format. This needs to be
   * free'd. */</span>
  Ustr *out = ustr_dup_fmt(<span class="str">"%s %s"</span>, <span class="str">"Hello"</span>, <span class="str">"world!"</span>);

  <span class="if">if</span> (!out || !ustr_io_putfileline(&amp;out, <span class="stdout">stdout</span>))
    die(<span class="str">"hello_world"</span>, strerror(<span class="errno">errno</span>));
}
</pre>

<a class="anchor" id="hw_m">
<h2> Hello World - multiple parts</h2>
</a>

<p> This example creates the string using multiple functions. The interesting
part to notice is the call to ustr_enomem(), this checks if any allocation
failures have happened to this particular ustr. Also note that we create
a specifically configured ustr, which includes a size and a 4 byte reference
 count.</p>

<pre class="c2html">
<span class="static">static</span> <span class="void">void</span> hello_world_three(<span class="void">void</span>)
{ <span class="comment">/* manually create a Ustr, from multiple parts. Often "significantly faster"
   * than using ustr_*_fmt(), due to printf overhead. Still needs to allocate
   * memory, and maybe resize it. Still need to free it. */</span>
  Ustr *hello = USTR1(\5, <span class="str">"Hello"</span>);
  Ustr *sp    = USTR1(\1, <span class="str">" "</span>);
  Ustr *world = USTR1(\6, <span class="str">"world!"</span>);
  Ustr *out   = ustr_dupx_empty(1, 4, <span class="false">USTR_FALSE</span>, <span class="false">USTR_FALSE</span>);

  <span class="if">if</span> (!out)
    die(<span class="str">"hello_world"</span>, strerror(ENOMEM));
  
  ustr_add(&amp;out, hello);
  ustr_add(&amp;out, sp);
  ustr_add(&amp;out, world);
  
  <span class="if">if</span> (ustr_enomem(out)) <span class="comment">/* check all 3 additions at once */</span>
    die(<span class="str">"hello_world"</span>, strerror(ENOMEM));

  <span class="if">if</span> (!ustr_io_putfileline(&amp;out, <span class="stdout">stdout</span>))
    die(<span class="str">"hello_world"</span>, strerror(<span class="errno">errno</span>));
  
  ustr_free(out);
}
</pre>

<a class="anchor" id="hw_s">
<h2> Hello World - stack based allocation </h2>
</a>

<p> This example also creates a string from multiple functions, however the
storage for the ustr is on the stack and so we don't need to de-allocate the
ustr (although, as with the constant ustr, it does no harm).</p>

<pre class="c2html">
<span class="static">static</span> <span class="void">void</span> hello_world_four(<span class="void">void</span>)
{ <span class="comment">/* manually create a Ustr, but use "auto" allocated storage
   * (stack instead of heap). As long as you don't use more than ustr_size()
   * you don't need to free. Also note that ustr_dup() will now always copy. */</span>
  Ustr *sp    = USTR1(\1, <span class="str">" "</span>);
  Ustr *world = USTR1(\6, <span class="str">"world!"</span>);
  <span class="char">char</span> buf_out[1024] = USTR_BEG_FIXED2 <span class="str">"Hello"</span>;
  Ustr *out = USTR_SC_INIT_AUTO(buf_out, <span class="true">USTR_TRUE</span>, 5);

  ustr_add(&amp;out, sp);
  ustr_add(&amp;out, world);

  <span class="comment">/* in this can we know !ustr_enomem() as there is more than enough space */</span>
  
  <span class="if">if</span> (!ustr_io_putfileline(&amp;out, <span class="stdout">stdout</span>))
    die(<span class="str">"hello_world"</span>, strerror(<span class="errno">errno</span>));
  
  <span class="comment">/* ustr_free() not needed, because nothing was allocated.
   * Although it's often good to call it anyway, as it does no harm. */</span>
}
</pre>

<a class="anchor" id="fgrep">
<h2> fgrep </h2>
</a>

<p> This example works like GNU's fgrep --color. There are two main things to
 notice here. The first is that searching, or searching and replacing with a
 colourized variant, and reading lines are all just ustr API calls. The second
 is that the line data is allocated on the stack, by default.
</p>
<p> Both of these significantly improve the ease of use and speed of the
 resulting code, as ustr_io_getline() is significantly faster than fgets() (due
 to not having the length of data returned) and allocating from the stack can
 be a huge performance win (note that all functions, including ustr_free(), do the correct thing ... so we don't suffer complexity for that performance).
 </p>

<pre class="c2html">
<span class="static">static</span> <span class="int">int</span> fgrep(Ustr **ps1, Ustr *fgrep_srch, Ustr *fgrep_repl,
                 <span class="int">int</span> first_only)
{
  <span class="sizet">size_t</span> num = <span class="num0">0</span>;

  <span class="if">if</span> (fgrep_repl)
    num = ustr_srch_fwd(*ps1, <span class="num0">0</span>, fgrep_srch);
  <span class="else">else</span> <span class="if">if</span> (!(num = ustr_replace(ps1, fgrep_srch, fgrep_repl,
                                !first_only)) &amp;&amp; <span class="errno">errno</span>)
    die(<span class="str">"fgrep"</span>, strerror(ENOMEM));
  
  <span class="if">if</span> (!num)
    ustr_sc_del(ps1);

  <span class="return">return</span> (!!num);
}

<span class="static">static</span> <span class="void">void</span> fgrep_fp_loop(FILE *in, 
                          Ustr *fgrep_srch, Ustr *fgrep_repl)
{
  <span class="char">char</span> buf[USTR_SIZE_FIXED(160)]; <span class="comment">/* enough for two "normal" lines,
                                     after that we alloc. */</span>
  Ustr *line = USTR_SC_INIT_AUTO(buf, <span class="false">USTR_FALSE</span>, <span class="num0">0</span>);
  
  <span class="while">while</span> (ustr_io_getline(&amp;line, in))
  {
    <span class="if">if</span> (!fgrep(&amp;line, fgrep_srch, fgrep_repl, <span class="false">USTR_FALSE</span>))
      ustr_sc_del(&amp;line);
    else <span class="if">if</span> (!ustr_io_putfile(&amp;line, <span class="stdout">stdout</span>))
      die(<span class="str">"fgrep"</span>, strerror(<span class="errno">errno</span>));
    
    <span class="if">if</span> (line != USTR(buf)) <span class="comment">/* re-init */</span>
      ustr_sc_free2(&amp;line, USTR_SC_INIT_AUTO(buf, <span class="false">USTR_FALSE</span>, <span class="num0">0</span>));
  }
  <span class="if">if</span> (<span class="errno">errno</span>)
    die(<span class="str">"fgrep"</span>, strerror(<span class="errno">errno</span>));

  ustr_free(line);
}
</pre>

<a class="anchor" id="mkdir">
<h2> mkdir -p function</h2>
</a>

<p> This example shows how a single function can take both an allocated (and
 modifiable) ustr and a constant string ustr, to efficiently modify data.</p>

<pre class="c2html">
<span class="static">static</span> <span class="int">int</span> fu__mkdir_p(<span class="const">const</span> Ustr *s1, <span class="int">int</span> mode, <span class="sizet">size_t</span> off, <span class="int">int</span> ret)
{
  Ustr *allocd = USTR_NULL;
  <span class="char">char</span> *ptr = <span class="null">NULL</span>;

  <span class="if">if</span> (mkdir(ustr_cstr(s1), mode) != <span class="numm1">-1</span>)
    <span class="return">return</span> (ret + 1);

  <span class="switch">switch</span> (<span class="errno">errno</span>)
  {
    <span class="case">case</span> EEXIST: <span class="return">return</span> (ret);
      
    <span class="case">case</span> ENOENT: <span class="break">break</span>;
    
    <span class="default">default</span>:     <span class="return">return</span> (<span class="numm1">-1</span>);
  }

  <span class="if">if</span> ((off = ustr_srch_chr_rev(s1, off, <span class="chr">'/'</span>)) &lt;= 1)
  {
    <span class="errno">errno</span> = EINVAL;
    <span class="return">return</span> (<span class="numm1">-1</span>);
  }
  --off; <span class="comment">/* NOTE: offset moves from beg. to end */</span>
  
  <span class="if">if</span> (!ustr_owner(s1))
  { <span class="comment">/* do it this way, so we can pass constant Ustr's to this function
     * and don't use ustr_sc_ensure_owner() so that we don't release a
     * reference */</span>
    <span class="if">if</span> (!(allocd = ustr_dup_buf(ustr_cstr(s1), ustr_len(s1))))
      <span class="return">return</span> (<span class="numm1">-1</span>); <span class="comment">/* errno == ENOMEM, done by ustr */</span>
    s1 = allocd;
  }
  
  ptr = ustr_wstr((Ustr *)s1);
  ptr[off] = <span class="num0">0</span>;
  <span class="if">if</span> ((ret = fu__mkdir_p(s1, mode, ustr_len(s1) - off, ret + 1)) != <span class="numm1">-1</span>)
  {
    ptr[off] = <span class="chr">'/'</span>;
    <span class="if">if</span> (mkdir(ustr_cstr(s1), mode) == <span class="numm1">-1</span>)
      ret = <span class="numm1">-1</span>;
  }
  ustr_free(allocd);

  <span class="return">return</span> (ret);
}

<span class="comment">/* This returns -1, on error, or the number of directories created. */</span>
<span class="static">static</span> <span class="int">int</span> mkdir_p(<span class="const">const</span> Ustr *s1, <span class="int">int</span> mode)
{
  <span class="return">return</span> (fu__mkdir_p(s1, mode, <span class="num0">0</span>, <span class="num0">0</span>));
}
</pre>


<p> In other words:</p>

<pre class="c2html">
 mkdir_p(USTR1(\x9, <span class="str">"12/45/789"</span>), 0700)
</pre>

<p>...will create/free a single ustr for all 3 of the seperate paths
 required.</p>

<a class="anchor" id="txt2html">
<h2> text into similar looking html converter</h2>
</a>

<p> This is a function program showing how you can do the perl code (written by, tchrist@perl.com - Sunday, December 19th, 1999):</p>

<pre class="c2html">
<span class="comment">/* 
# first kill all the tabs
1 while s{ \t + }
         { " " x (length($&amp;)*8 - length($`)%8) }ex;

# then the four standard naughty bits 
s/&amp;/&amp;amp;/g;        # must remember to do this one first!
s/&lt;/&amp;lt;/g;         # this is the most important one
s/&gt;/&amp;gt;/g;         # don't close too early
s/"/&amp;quot;/g;       # only in embedded tags, i guess

# make lines break where they should
s/^\s*$/&lt;P&gt;/ || s/$/&lt;BR&gt;/;

# make sure spaces aren't squishticated so we
# can do indentation and properly align comments
s/( {2,})/'&amp;nbsp;' x length($1)/ge;
*/</span>
</pre>

<p> The interesting points here are how simple most of the ustr code is, tab
 conversion is 2 lines in the original perl and 5 (1 could be dropped for a
 speed loss) with ustr. The "four std. naughty bits" take a single line in
 both perl and ustr. The only major difference is in the final perl 1 liner,
 which is 8 interesting lines (due to the two or more spaces constraint, or
 we could just use <a href="functions.html#ustr_replace">ustr_replace()</a>).
</p>
<p> It's also worth noting that the ustr version is <b>significantly</b> faster
than the smaller perl code.
</p>

<pre class="c2html">
<span class="static">static</span> <span class="void">void</span> txt2html(Ustr **pline)
{
  Ustr *line = *pline;
  <span class="sizet">size_t</span> tab_pos = <span class="num0">0</span>;
  <span class="sizet">size_t</span> tab_off = <span class="num0">0</span>;
  <span class="int">int</span> has_ret = <span class="num0">0</span>;
  
  <span class="comment">/* convert tabs to spaces */</span>
  <span class="while">while</span> ((tab_pos = ustr_srch_chr_fwd(line, tab_off, <span class="chr">'\t'</span>)))
  {
    <span class="sizet">size_t</span> tabs_len = ustr_spn_chr_fwd(line, tab_pos - 1, <span class="chr">'\t'</span>);
    <span class="sizet">size_t</span> spcs_len = (tabs_len * 8) - ((tab_pos - 1) % 8);
    
    ustr_sc_sub_rep_chr(&amp;line, tab_pos, tabs_len, <span class="chr">' '</span>, spcs_len);
    
    tab_off = tab_pos + spcs_len - 1;
  }

  <span class="if">if</span> (ustr_cstr(line)[ustr_len(line) - 1] == <span class="chr">'\n'</span>)
    has_ret = 1;
  
  <span class="if">if</span> (ustr_spn_chr_fwd(line, <span class="num0">0</span>, <span class="chr">' '</span>) == (ustr_len(line) - has_ret))
    ustr_set(&amp;line, USTR1(\3, <span class="str">"&lt;P&gt;"</span>)); <span class="comment">/* blank lines start new paragraph */</span>
  <span class="else">else</span>
  {
    <span class="sizet">size_t</span> spcs_off = <span class="num0">0</span>;
    <span class="sizet">size_t</span> spcs_pos = <span class="num0">0</span>;
    <span class="char">char</span> buf_rep[USTR_SIZE_FIXED(40 * 6)] = USTR_BEG_FIXED2 <span class="str">"&amp;nbsp;"</span>;
    
    ustr_replace_cstr(&amp;line, <span class="str">"&amp;"</span>,  <span class="str">"&amp;amp;"</span>,  <span class="num0">0</span>);
    ustr_replace_cstr(&amp;line, <span class="str">"&lt;"</span>,  <span class="str">"&amp;lt;"</span>,   <span class="num0">0</span>);
    ustr_replace_cstr(&amp;line, <span class="str">"&gt;"</span>,  <span class="str">"&amp;gt;"</span>,   <span class="num0">0</span>);
    ustr_replace_cstr(&amp;line, <span class="str">"\""</span>, <span class="str">"&amp;quot;"</span>, <span class="num0">0</span>);
    ustr_del(&amp;line, has_ret);
    ustr_add_cstr(&amp;line, <span class="str">"&lt;BR&gt;\n"</span>);

    <span class="comment">/* convert runs of two or more spaces into runs of &amp;nbsp; */</span>
    <span class="while">while</span> ((spcs_pos = ustr_srch_cstr_fwd(line, spcs_off, <span class="str">"  "</span>)))
    {
      <span class="sizet">size_t</span> spcs_len = ustr_spn_chr_fwd(line, spcs_pos - 1, <span class="chr">' '</span>);
      <span class="sizet">size_t</span> rep = spcs_len;
      <span class="sizet">size_t</span> more = <span class="num0">0</span>;
      Ustr *rep_nbsp = USTR_SC_INIT_AUTO(buf_rep, USTR_FALSE, <span class="num0">0</span>);

      <span class="while">while</span> (rep--)
        ustr_add_cstr(&amp;rep_nbsp, <span class="str">"&amp;nbsp;"</span>);

      <span class="if">if</span> (ustr_enomem(rep_nbsp) ||
          !ustr_sc_sub(&amp;line, spcs_pos, spcs_len, rep_nbsp))
        die(prog_name, strerror(ENOMEM));

      spcs_off = spcs_pos + (spcs_len * strlen(<span class="str">"&amp;nbsp;"</span>)) - 1;

      ustr_free(rep_nbsp);
    }
  }
  
  <span class="if">if</span> (ustr_enomem(line))
    die(<span class="str">"txt2html"</span>, strerror(<span class="errno">errno</span>));

  *pline = line;
}
</pre>

    <hr>
    <address><a href="mailto:james@and.org">James Antill</a></address>
<!-- Created: Sat Oct 13 16:41:12 EDT 2007 -->
<!-- hhmts start -->
Last modified: Tue Oct 30 01:20:07 EDT 2007
<!-- hhmts end -->
  </body>
</html>
